import os
import re
import pandas as pd

# 评论文件夹路径
comments_folder = '评论'
processed_comments_folder = '处理过的评论'

# 遍历评论文件夹下的所有品牌文件夹
for brand in os.listdir(comments_folder):
    brand_folder = os.path.join(comments_folder, brand)

    if os.path.isdir(brand_folder):
        all_comments = {}

        # 遍历品牌文件夹中的所有txt文件
        for file_name in os.listdir(brand_folder):
            if file_name.endswith('.txt'):
                file_path = os.path.join(brand_folder, file_name)

                # 读取txt文件内容
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read()

                    # 使用正则表达式匹配评论及其出现次数
                    matches = re.findall(r'([\u4e00-\u9fa5]+)\s*（(\d+)）', content)

                    for comment, count in matches:
                        count = int(count)
                        if comment in all_comments:
                            all_comments[comment] += count
                        else:
                            all_comments[comment] = count

        # 创建保存Excel文件的目录
        brand_output_folder = os.path.join(processed_comments_folder, brand)
        os.makedirs(brand_output_folder, exist_ok=True)

        # 将评论及其出现次数保存到Excel表中
        output_file_path = os.path.join(brand_output_folder, f'{brand}.xlsx')
        df = pd.DataFrame(list(all_comments.items()), columns=['评论', '出现次数'])
        df.to_excel(output_file_path, index=False)

print("数据处理完成并保存到Excel表中。")
